#!/usr/bin/env perl
use warnings;
use strict;

#
# Ratings mean and variance of users or movies (similar input format).
# Takes input on standard in.
# Input format is assumed to be like training_set.txt.
# Output format is
# user_id num_ratings ratings_mean ratings_var
# ...
#

sub print_stats {
  # Total up the ratings.
  my ($id, @ratings) = @_;
  my $n = @ratings;

  my $ratings_sum = 0.0;
  map { $ratings_sum += $_ } @ratings;

  my $ratings_mean = $ratings_sum / $n;

  my $ratings_var = 0.0;
  if ($n > 1) {
    map { $ratings_var += ($_ - $ratings_mean)**2 } @ratings;
    $ratings_var /= $n - 1;
  }

  print "$id $n $ratings_mean $ratings_var\n";
}

my $id;
my @ratings;
while(<STDIN>) {
  if (/(\d+):/) {
    if (defined($id)) {
      print_stats($id, @ratings);
    }
    $id = int($1);
    @ratings = ();
  } else { 
    /\d+,(\d+),.*/ or die "unexpected line format $_";
    push(@ratings, $1);
  }
}
print_stats($id, @ratings);
# Copyright (c) 2009 John Lees-Miller
# 
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
# 
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

